import re
import time

import requests
from selenium.webdriver.common.by import By

from finance.selenium_test import getBrowser

company = '格力电器'
""" 1. 东方财富网 - 股吧数据爬取 """
def getStockBar():
    browser = getBrowser()
    browser.get('http://guba.eastmoney.com/list,600519.html')
    data = browser.page_source

    p_title = '<a href=".*?" title="(.*?)"'
    title = re.findall(p_title, data)
    print(title)

"""2. 批量爬取多家公司资讯信息"""
def zixun(company):
    browser = getBrowser()
    browser.get('http://so.eastmoney.com/news/s?keyword=' + company)
    time.sleep(2)
    data = browser.page_source

    p_title = '<div class="news_item"><div.*?><a href=".*?">(.*?)</a>'
    p_href = '<div class="news_item"><div.*?><a href="(.*?)">.*?</a>'
    p_date = '<span class="news_item_time">(.*?)</span>'
    title = re.findall(p_title, data)
    href = re.findall(p_href, data)
    date = re.findall(p_date, data, re.S)

    for i in range(len(title)):
        title[i] = re.sub('<.*?>', '', title[i])
        date[i] = date[i].split(' ')[0]
        print(str(i + 1) + '.' + title[i] + ' - ' + date[i])
        print(href[i])

"""3. 获取公司研报 """
def yanbao(company):

    # 1. 获取研报列表数据
    browser = getBrowser()
    url = 'http://so.eastmoney.com/Yanbao/s?keyword='+company
    browser.get(url)
    data = browser.page_source

    # 2. 获取研报列表链接
    p_href = '<div class="notice_item">.*?<a href="(.*?)"'
    href = re.findall(p_href, data)

    browser = getBrowser()
    for i in range(len(href)):  # 这里的href是上一小节“”获取的各个跳转页面的网址
        # 1.请求研报跳转页面
        browser.get(href[i])
        data = browser.page_source  # 注意这里就不要在for循环里写browser.quit()了，因为它一直用的一个模拟浏览器，不要退出

        # 2.提取跳转页面中的标题和PDF下载网址
        p_name = '<h1>(.*?)</h1>'
        p_href_pdf = '<a class="rightlab" href="(.*?)">【点击查看PDF原文】</a>'
        href_pdf = re.findall(p_href_pdf, data, re.S)
        name = re.findall(p_name, data, re.S)

        # 去除空格和换行
        name[0] = re.sub(r'[\s]+', '', name[0])
        # 3.下载PDF文件
        res = requests.get(href_pdf[0])  # 研报文件比较大，下载需要等待一些时间
        path = company + '\\' + name[0] + '.pdf'  # 需要在代码所在文件夹提前建立名为 格力研报 文件夹！
        file = open(path, 'wb')
        file.write(res.content)
        file.close()

"""4. 问询函 """
def wenxunhan():
    url= 'http://www.sse.com.cn/disclosure/credibility/supervision/inquiries/'
    pass

"""5. 巨潮理财公告"""
def browserDownload():
    url = 'http://www.cninfo.com.cn/new/fulltextSearch?notautosubmit=&keyWord=%E7%90%86%E8%B4%A2'
    browser = getBrowser()
    # 请求网址
    browser.get(
        'http://www.cninfo.com.cn/new/disclosure/detail?plate=sse&orgId=gssh0600519&stockCode=600519&announcementId=1208776647&announcementTime=2020-11-23%2008:21')
    time.sleep(1)
    # 模拟点击下载按钮进行下载
    browser.find_element(By.XPATH,'//*[@id="noticeDetail"]/div/div[1]/div[3]/div[1]/button').click()
    time.sleep(5)

if __name__ == '__main__':
    # getStockBar()
    # companys = ['贵州茅台']
    # for i in companys:
    #     zixun(i)
    # yanbao(company)
    browserDownload()
    pass